require(quanteda)
require(stringi)
require(ggplot2)
require(ggrepel)

month <- c('January', 'February', 'March', 'April', 'May', 'June',
           'July', 'August', 'September', 'October', 'November', 'December')
month <- c(month, stri_sub(month, 1, 3))
day <- c('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday')
title <- c('Mr', 'Ms', 'Mrs', "Dr", "Gen")

country_major <- c("de", "ru", "gb", "fr", "jp", "cn", "es")
country_minor <- c("ir", "iq", "af", "sy", "vn", "cu", "ca", "mx")

get_gti <- function(x, country = NULL) {
    
    x <- subset(x, year >= 1861)
    if (is.null(country))
        country <- unique(x$country.class)
    x$year_factor <- factor(x$year, seq(1861, 2017))
    
    tb_year <- xtabs(~ year_factor + country.class + threat, x)
    total <- rowSums(tb_year)
    tb_year <- tb_year[,country,"TRUE"] / total
    if (!is.table(tb_year))
        return(tb_year)
    result <- as.data.frame(tb_year)
    colnames(result) <- c("year", "country", "gti")
    result$year <- as.numeric(as.character(result$year))
    result$country <- as.character(result$country)
    return(result)
}

smooth_gti <- function(x, m = c(1, 1) , sum = FALSE, index = "gti") {
    x$index <- x[[index]]
    tb_year <- xtabs(index ~ year + country, x)
    if (sum) {
        tb_year <- as.matrix(rowSums(tb_year))
        colnames(tb_year) <- "WORLD"
    }
    tb_smooth <- as.table(kernapply(tb_year, kernel("daniell", m)))
    result <- as.data.frame(tb_smooth)
    colnames(result) <- c("year", "country", "index")
    result$year <- as.numeric(as.character(result$year))
    result$country <- as.character(result$country)
    return(result)
}

plot_gti <- function(x, country = NULL, event, min_y = 0, max_y = 0.20, 
                     nudge_y = 0.05, m = c(1, 1), sum = FALSE, index = "gti", add = FALSE) {
    if (is.null(country))
        country <- unique(x$country)
    x <- x[x$country %in% country,]
    temp <- smooth_gti(x, m = m, sum = sum, index = index)
    temp$Index <- temp$index
    temp$Year <- temp$year
    temp$Country <- stri_trans_toupper(temp$country)
    
    label <- data.frame()
    for (m in names(event)) {
        label <- rbind(label, data.frame(Country = m,
                                         Year = unlist(event[[m]]),
                                         Name = names(unlist(event[[m]])),
                                         stringsAsFactors = FALSE))
    }
    label$Country <- stri_trans_toupper(label$Country)
    
    temp <- merge(temp, label, all.x = TRUE)
    if (add) {
            geom_line(data = temp, na.rm = TRUE, colour = "black", aes(linetype = Country)) #+ # for BW
            #geom_line(na.rm = TRUE, aes(colour = Country)) +
            #geom_point(aes(y = ifelse(is.na(Name), NA, Index)), na.rm = TRUE) # for BW
            #geom_point(aes(y = ifelse(is.na(Name), NA, Index), colour = Country), na.rm = TRUE)
    } else {
        ggplot(temp, aes(x = Year, y = Index, group = Country)) +
            ylim(min_y, max_y) +
            #xlim(1860, 2020) +
            geom_line(na.rm = TRUE, colour = "black", aes(linetype = Country)) + # for BW
            #geom_line(na.rm = TRUE, aes(colour = Country)) +
            geom_point(aes(y = ifelse(is.na(Name), NA, Index)), na.rm = TRUE) + # for BW
            #geom_point(aes(y = ifelse(is.na(Name), NA, Index), colour = Country), na.rm = TRUE) +
            geom_text_repel(aes(x = Year, y = Index, label = Name), # for BW
                            # geom_text_repel(aes(x = Year, y = Index, label = Name, colour = Country), 
                            na.rm = TRUE,
                            min.segment.length = 0.5,
                            nudge_y = nudge_y,
                            force = 10, show.legend = FALSE,
                            segment.alpha = 0.3, direction = "y") +
            ylab("Threat index") +
            scale_x_continuous(limits = c(1860, 2020), breaks = seq(1860, 2020, 20)) +
            theme_light() +
            theme(text = element_text(size = 14),
                  legend.position = "top",
                  legend.text = element_text(size = 14),
                  axis.text = element_text(size = 14),
                  axis.title.x = element_blank(),
                  axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
                  panel.grid.major = element_blank(), 
                  panel.grid.minor = element_blank())
    }
}

cor_index <- function(x, country, measure = c("mids", "cinc", "gc", "ls", "un", "lrm_prob")) {
    result <- data.frame(row.names = country)
    for (m in measure) {
        estimate <- p <- numeric()
        for (n in country) {
            y <- subset(x, country == n, select = c("gti", m))
            if (all(is.na(y[[2]]))) {
                estimate <- c(estimate, NA)
                p <- c(p, NA)
            } else {
                stat <- cor.test(y[[1]], y[[2]], na.action = "remove")
                estimate <- c(estimate, unname(stat$estimate))
                p <- c(p, unname(stat$p.value))
            }
        }
        attr(estimate, "p.value") <- p
        result <- cbind(result, estimate)
    }
    colnames(result) <- measure
    return(result)
}

format_cor <- function(x) {
    result <- list()
    for (i in seq_along(x)) {
        result[[i * 2 - 1]] <- x[[i]]
        result[[i * 2]] <- symnum(
            attr(x[[i]], "p.value"), corr = FALSE,
            cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
            symbols = c("***","**","*","."," ")
        )
    }
    result <- data.frame(result)
    rownames(result) <- rownames(x)
    colname <- character(ncol(result))
    colname[seq_along(x) * 2 - 1] <- colnames(x)
    colnames(result) <- colname
    result <- result[sort(rownames(result)),]
    result <- rbind(result, "average" = lapply(result, function(x) {
                if (is.character(x)) "" else mean(x, na.rm = TRUE)
           }))
    return(result)
}

plot_comparison <- function(x, xlim = c(1860, 2020)) {
    plot(NULL, type = "n", xlim = xlim, ylim = c(-2, 3), 
         ylab = "Normalized score", xaxt = "n")
    for (i in seq_along(colnames(x))) {
        v <- x[,i][x[,i] != 0]
        v <- rowSums(scale(v))
        s <- kernapply(v, kernel("daniell", c(1, 1)))
        y <- as.numeric(names(s))
        points(head(y, 1), head(s, 1), pch = i)
        points(tail(y, 1), tail(s, 1), pch = i)
        lines(y, s, lty = i, lwd = 1)
    }
    legend("topright", stri_trans_toupper(colnames(x)), 
           lty = seq_len(ncol(x)), pch = seq_len(ncol(x)),
           col = 1, horiz = FALSE)
    axis(1, seq(xlim[1], xlim[2], by = 10))
}
